While investigating some discrepancies among different datasets a handful of corrections were made. This document is intended to see if any of those changes have important implications for some of the species-specific reports that have been done recently.
The two datasets being compared here are the survdat_Nye_allseasons.RData and the more recent NEFSC_BTS_all_seasons_03032021.RData.
# Cleanup functions
source(here("R/01_nefsc_ss_build.R"))
#### Survdat resource Load Data
# 2019 Data used in 2020 paper
load(paste0(nmfs_path, "Survdat_Nye_allseason.RData"))
survdat_nye <- survdat %>% clean_names()
# Run cleanup
survdat_nye <- survdat_prep(survdat = survdat_nye) %>%
mutate(survdat_source = "survdat_nye")
# 2020 data received last august
load(paste0(res_path, "NMFS_trawl/Survdat_Nye_Aug 2020.RData"))
survdat_20 <- clean_names(survdat)
# Run cleanup
survdat_20 <- survdat_prep(survdat = survdat_20) %>%
mutate(survdat_source = "survdat_nye2020")
# Data we just received in 2021 with errors located and corrected
load(paste0(nmfs_path, "NEFSC_BTS_all_seasons_03032021.RData"))
survdat_21 <- survey$survdat %>% clean_names()
# Run cleanup
survdat_21 <- survdat_prep(survdat = survdat_21) %>%
mutate(survdat_source = "survdat_2021")
rm(survdat, survey)
#### Load the species list
species_check <- read_csv(here("data/andrew_species/Assesmentfishspecies.csv"),
col_types = cols())
species_check <- species_check %>%
clean_names() %>%
mutate(svspp = str_pad(svspp, 3, pad = "0", side = "left"),
comname = tolower(comname),
species = str_to_title(species)) %>%
arrange(svspp)
# Put in a list
source_list <- list("survdat_nye_2019" = survdat_nye,
"survdat_nye_2020" = survdat_20,
"survdat_2021" = survdat_21)
# Make years comparable
# Filter species down for both
source_list <- map(source_list, function(survdat_data){
survdat_data %>%
filter(est_year <= 2017,
svspp %in% species_check$svspp)
})
# Split them each by comname
source_splits <- map(source_list, function(source_data){
source_split <- source_data %>% split(.$comname)
})
# Make Comparisons
# Vector of species and their common names, atleast the common names Andrew used
andrew_species <- species_check$comname %>% setNames(species_check$svspp)
# Pull the species
species_comparisons <- imap(andrew_species, function(species_comname, species_svspp){
# there are some name mismatches, so catch those
in_nye <- species_comname %in% names(source_splits$survdat_nye_2019)
in_20 <- species_comname %in% names(source_splits$survdat_nye_2020)
in_21 <- species_comname %in% names(source_splits$survdat_2021)
in_both <- in_nye & in_21
in_all <- in_nye & in_20 & in_21
if(in_both == FALSE){
return(list("in_nye" = in_nye,
"in_21" = in_21,
"in_all" = in_all,
"data" = data.frame(),
"metrics" = data.frame()))
}
# Pull just that species from the 2019 data
summ_19 <- source_list$survdat_nye_2019 %>%
filter(svspp == species_svspp) %>%
group_by(comname, est_year) %>%
summarise(abund_19 = sum(abundance, na.rm = T),
biom_19 = sum(biom_adj, na.rm = T),
.groups = "keep") %>% ungroup()
# same for 2020 data
summ_20 <- source_list$survdat_nye_2020 %>%
filter(svspp == species_svspp) %>%
group_by(comname, est_year) %>%
summarise(abund_20 = sum(abundance, na.rm = T),
biom_20 = sum(biom_adj, na.rm = T),
.groups = "keep") %>% ungroup()
# and 2021 data
summ_21 <- source_list$survdat_2021 %>%
filter(svspp == species_svspp) %>%
group_by(comname, est_year) %>%
summarise(abund_21 = sum(abundance, na.rm = T),
biom_21 = sum(biom_adj, na.rm = T),
.groups = "keep") %>% ungroup()
# join them for side-by-side data
combined_data <- left_join(summ_19, summ_20, by = c("comname", "est_year")) %>%
left_join(summ_21, by = c("comname", "est_year")) %>%
mutate(abund_change_19to21 = ((abund_21 - abund_19) / abund_19) * 100,
biom_change_19to21 = ((biom_21 - biom_19) / biom_19) * 100,
abund_change_20to21 = ((abund_21 - abund_20) / abund_20) * 100,
biom_change_20to21 = ((biom_21 - biom_20) / biom_20) * 100)
#return(combined_data)
# abunndance correlation
abund_cor_19to21 <- cor(combined_data$abund_21,
combined_data$abund_19,
use = "pairwise.complete.obs")
abund_cor_20to21 <- cor(combined_data$abund_21,
combined_data$abund_20,
use = "pairwise.complete.obs")
# biomass correlation
biomass_cor_19to21 <- cor(combined_data$biom_21,
combined_data$biom_19,
use = "pairwise.complete.obs")
biomass_cor_20to21 <- cor(combined_data$biom_21,
combined_data$biom_20,
use = "pairwise.complete.obs")
# average relative shift
abund_shift_19to21 <- mean(combined_data$abund_change_19to21, na.rm = T)
biom_shift_19to21 <- mean(combined_data$biom_change_19to21, na.rm = T)
abund_shift_20to21 <- mean(combined_data$abund_change_20to21, na.rm = T)
biom_shift_20to21 <- mean(combined_data$biom_change_20to21, na.rm = T)
# put in list to export
list("data" = combined_data,
"metrics" = data.frame(
"comname" = species_comname,
"svspp" = species_svspp,
"abund_corr_19to21" = abund_cor_19to21,
"abund_corr_20to21" = abund_cor_20to21,
"biom_corr_19to21" = biomass_cor_19to21,
"biom_corr_20to21" = biomass_cor_20to21,
"perc_abund_19to21" = abund_shift_19to21,
"perc_abund_20to21" = abund_shift_20to21,
"perc_biom_19to21" = biom_shift_19to21,
"perc_biom_20to21" = biom_shift_20to21)
)
})
# put data and metrics into a table
comparison_data <- map_dfr(species_comparisons, ~.x[["data"]])
comparison_metrics <- map_dfr(species_comparisons, ~.x[["metrics"]])
comparison_metrics %>%
mutate(comname = fct_reorder(comname, abund_corr_19to21, max, .desc = F)) %>%
ggplot(aes(x = abund_corr_19to21, y = comname)) +
geom_segment(aes(xend = 0, yend = comname), color = gmri_cols("gmri blue")) +
geom_point(color = gmri_cols("gmri blue")) +
labs(y = "", x = "Correlation Between Total Annual Abundances")
comparison_metrics %>%
mutate(comname = fct_reorder(comname, perc_abund_19to21, max, .desc = F)) %>%
ggplot(aes(x = perc_abund_19to21, y = comname)) +
geom_segment(aes(xend = 0, yend = comname), color = gmri_cols("gmri blue")) +
geom_point(color = gmri_cols("gmri blue")) +
scale_x_continuous(labels = scales::percent_format()) +
labs(y = "", x = "Avg. Percent Change in Annual Abundance\nMoving from Old Data -> New Data")
# correlation
corr_cutoff <- 0.8
corr_species <- comparison_metrics %>%
filter(abund_corr_19to21 <= corr_cutoff) %>%
mutate(comname = fct_drop(comname)) %>%
pull(comname)
# filter species
corr_sub <- comparison_data %>%
filter(comname %in% corr_species)
# how many species per plot
p1 <- corr_species[1:6]
p2 <- corr_species[-c(1:6)]
# p1
corr_sub %>%
filter(comname %in% p1) %>%
mutate(comname = fct_drop(comname)) %>%
ggplot() +
geom_line(aes(est_year, abund_19, color = "Survdat Nye Allseason 2019")) +
geom_line(aes(est_year, abund_21, color = "Newest Survdat")) +
facet_wrap(~comname, ncol = 1, scales = "free" ) +
scale_y_continuous(labels = scales::comma_format()) +
scale_color_gmri() +
labs(x = "",
y = "Annual Abundance",
title = paste0("Species with Annual Abundance Correlation <= ", corr_cutoff),
color = "Survdat Source",
subtitle = "Subset 1")
# p1
corr_sub %>%
filter(comname %in% p2) %>%
mutate(comname = fct_drop(comname)) %>%
ggplot() +
geom_line(aes(est_year, abund_19, color = "Survdat Nye Allseason 2019")) +
geom_line(aes(est_year, abund_21, color = "Newest Survdat")) +
facet_wrap(~comname, ncol = 1, scales = "free" ) +
scale_y_continuous(labels = scales::comma_format()) +
scale_color_gmri() +
labs(x = "",
y = "Annual Abundance",
title = paste0("Species with Annual Abundance Correlation <= ", corr_cutoff),
color = "Survdat Source",
subtitle = "Subset 2")
# percent change
perc_cutoff <- 25
perc_species <- comparison_metrics %>%
filter(perc_abund_19to21 >= perc_cutoff) %>%
mutate(comname = fct_drop(comname)) %>%
pull(comname)
comparison_data %>%
filter(comname %in% perc_species) %>%
mutate(comname = fct_drop(comname)) %>%
ggplot() +
geom_line(aes(est_year, abund_19, color = "Survdat Nye Allseason 2019")) +
geom_line(aes(est_year, abund_21, color = "Newest Survdat")) +
facet_wrap(~comname, ncol = 1, scales = "free") +
scale_y_continuous(labels = scales::comma_format()) +
scale_color_gmri() +
labs(x = "",
y = "Annual Abundance",
title = paste0("Species with Avg. Shift in Abundance >= ", perc_cutoff, "%"),
color = "Survdat Source")
comparison_metrics %>%
mutate(comname = fct_reorder(comname, biom_corr_19to21, max, .desc = F)) %>%
ggplot(aes(x = biom_corr_19to21, y = comname)) +
geom_segment(aes(xend = 0, yend = comname), color = gmri_cols("gmri blue")) +
geom_point(color = gmri_cols("gmri blue")) +
labs(y = "", x = "Correlation Between Total Annual Biomasses")
comparison_metrics %>%
mutate(comname = fct_reorder(comname, perc_biom_19to21, max, .desc = F)) %>%
ggplot(aes(x = perc_biom_19to21, y = comname)) +
geom_segment(aes(xend = 0, yend = comname), color = gmri_cols("gmri blue")) +
geom_point(color = gmri_cols("gmri blue")) +
labs(y = "", x = "Avg. Percent Change in Annual Biomasses\nMoving from Old Data -> New Data")
# correlation
corr_cutoff <- 0.8
corr_species <- comparison_metrics %>%
filter(biom_corr_19to21 <= corr_cutoff) %>%
mutate(comname = fct_drop(comname)) %>%
pull(comname)
# plot
if(length(corr_species) > 0) {
comparison_data %>%
filter(comname %in% corr_species) %>%
mutate(comname = fct_drop(comname)) %>%
ggplot() +
geom_line(aes(est_year, biom_19, color = "Survdat Nye Allseason 2019")) +
geom_line(aes(est_year, biom_21, color = "Newest Survdat")) +
facet_wrap(~comname, ncol = 1, scales = "free" ) +
scale_y_continuous(labels = scales::comma_format()) +
scale_color_gmri() +
labs(x = "",
y = "Annual Biomass",
title = paste0("Species with Annual Biomass Correlation <= ", corr_cutoff),
color = "Survdat Source")
}
# percent change
perc_cutoff <- 25
perc_species <- comparison_metrics %>%
filter(perc_biom_19to21 >= perc_cutoff) %>%
mutate(comname = fct_drop(comname)) %>%
pull(comname)
comparison_data %>%
filter(comname %in% perc_species) %>%
mutate(comname = fct_drop(comname)) %>%
ggplot() +
geom_line(aes(est_year, biom_19, color = "Survdat Nye Allseason 2019")) +
geom_line(aes(est_year, biom_21, color = "Newest Survdat")) +
facet_wrap(~comname, ncol = 1, scales = "free") +
scale_y_continuous(labels = scales::comma_format()) +
scale_color_gmri() +
labs(x = "",
y = "Annual Biomass",
title = paste0("Species with Avg. Shift in Biomass >= ", perc_cutoff, "%"),
color = "Survdat Source")
comparison_metrics %>%
mutate(comname = fct_reorder(comname, abund_corr_20to21, max, .desc = F)) %>%
ggplot(aes(x = abund_corr_20to21, y = comname)) +
geom_segment(aes(xend = 0, yend = comname), color = gmri_cols("gmri blue")) +
geom_point(color = gmri_cols("gmri blue")) +
labs(y = "", x = "Correlation Between Total Annual Abundances")
comparison_metrics %>%
mutate(comname = fct_reorder(comname, perc_abund_20to21, max, .desc = F)) %>%
ggplot(aes(x = perc_abund_20to21, y = comname)) +
geom_segment(aes(xend = 0, yend = comname), color = gmri_cols("gmri blue")) +
geom_point(color = gmri_cols("gmri blue")) +
scale_x_continuous(labels = scales::percent_format()) +
labs(y = "", x = "Avg. Percent Change in Annual Abundance\nMoving from Old Data -> New Data")
# correlation
corr_cutoff <- 0.8
corr_species <- comparison_metrics %>%
filter(abund_corr_20to21 <= corr_cutoff) %>%
mutate(comname = fct_drop(comname)) %>%
pull(comname)
# filter species
corr_sub <- comparison_data %>%
filter(comname %in% corr_species)
# how many species per plot
p1 <- corr_species[1:6]
#p2 <- corr_species[-c(1:6)]
# p1
corr_sub %>%
filter(comname %in% p1) %>%
mutate(comname = fct_drop(comname)) %>%
ggplot() +
geom_line(aes(est_year, abund_20, color = "Survdat Nye - August 2020")) +
geom_line(aes(est_year, abund_21, color = "Newest Survdat")) +
facet_wrap(~comname, ncol = 1, scales = "free" ) +
scale_y_continuous(labels = scales::comma_format()) +
scale_color_gmri() +
labs(x = "",
y = "Annual Abundance",
title = paste0("Species with Annual Abundance Correlation <= ", corr_cutoff),
color = "Survdat Source",
subtitle = "Subset 1")
# percent change
perc_cutoff <- 25
perc_species <- comparison_metrics %>%
filter(perc_abund_20to21 >= perc_cutoff) %>%
mutate(comname = fct_drop(comname)) %>%
pull(comname)
comparison_data %>%
filter(comname %in% perc_species) %>%
mutate(comname = fct_drop(comname)) %>%
ggplot() +
geom_line(aes(est_year, abund_20, color = "Survdat Nye - August 2020")) +
geom_line(aes(est_year, abund_21, color = "Newest Survdat")) +
facet_wrap(~comname, ncol = 1, scales = "free") +
scale_y_continuous(labels = scales::comma_format()) +
scale_color_gmri() +
labs(x = "",
y = "Annual Abundance",
title = paste0("Species with Avg. Shift in Abundance >= ", perc_cutoff, "%"),
color = "Survdat Source")
comparison_metrics %>%
mutate(comname = fct_reorder(comname, biom_corr_20to21, max, .desc = F)) %>%
ggplot(aes(x = biom_corr_20to21, y = comname)) +
geom_segment(aes(xend = 0, yend = comname), color = gmri_cols("gmri blue")) +
geom_point(color = gmri_cols("gmri blue")) +
labs(y = "", x = "Correlation Between Total Annual Biomasses")
comparison_metrics %>%
mutate(comname = fct_reorder(comname, perc_biom_20to21, max, .desc = F)) %>%
ggplot(aes(x = perc_biom_20to21, y = comname)) +
geom_segment(aes(xend = 0, yend = comname), color = gmri_cols("gmri blue")) +
geom_point(color = gmri_cols("gmri blue")) +
labs(y = "", x = "Avg. Percent Change in Annual Biomasses\nMoving from Old Data -> New Data")
# correlation
corr_cutoff <- 0.8
corr_species <- comparison_metrics %>%
filter(biom_corr_20to21 <= corr_cutoff) %>%
mutate(comname = fct_drop(comname)) %>%
pull(comname)
# plot
if(length(corr_species) > 0){
comparison_data %>%
filter(comname %in% corr_species) %>%
mutate(comname = fct_drop(comname)) %>%
ggplot() +
geom_line(aes(est_year, biom_20, color = "Survdat Nye - August 2020")) +
geom_line(aes(est_year, biom_21, color = "Newest Survdat")) +
facet_wrap(~comname, ncol = 1, scales = "free" ) +
scale_y_continuous(labels = scales::comma_format()) +
scale_color_gmri() +
labs(x = "",
y = "Annual Biomass",
title = paste0("Species with Annual Biomass Correlation <= ", corr_cutoff),
color = "Survdat Source")
}
# percent change
perc_cutoff <- 25
perc_species <- comparison_metrics %>%
filter(perc_biom_20to21 >= perc_cutoff) %>%
mutate(comname = fct_drop(comname)) %>%
pull(comname)
comparison_data %>%
filter(comname %in% perc_species) %>%
mutate(comname = fct_drop(comname)) %>%
ggplot() +
geom_line(aes(est_year, biom_20, color = "Survdat Nye - August 2020")) +
geom_line(aes(est_year, biom_21, color = "Newest Survdat")) +
facet_wrap(~comname, ncol = 1, scales = "free") +
scale_y_continuous(labels = scales::comma_format()) +
scale_color_gmri() +
labs(x = "",
y = "Annual Biomass",
title = paste0("Species with Avg. Shift in Biomass >= ", perc_cutoff, "%"),
color = "Survdat Source")
A work by Adam A. Kemberling
Akemberling@gmri.org